#!/bin/sh
if [ $# -lt 2 ];
then
    echo "usage: statTermFrequency.sh document separator";
    exit
fi


if [ $# -gt 0 ];
then
    filename=$1
else
    filename="/data/lvhongliang/askrelated/term_count_new_selected/ask_questions_baby_0y1m"
fi
if [ $# -gt 1 ];
then
    seperator=$2
else
    seperator=" "
fi
dirPath="../data/lsa/"
if [ -f "userIdStat.txt" ];
then 
    rm $dirPath"userIdStat.txt"
fi
if [ -f "itemIdStat.txt" ];
then
    rm $dirPath"itemIdStat.txt"
fi

awk -F"${seperator}" '
BEGIN {
    dirPath = "../data/ItemCF/"
}
{
    split($2, tmpArray, " ");
    for (termId in tmpArray) {
        if (length(tmpArray[termId]) < 1) continue;
        split(tmpArray[termId], termArray, "@");
        term = termArray[1];
        if (frequency[term]) {
            ++frequency[term];
        }
        else {
            frequency[term] = 1;
        }
    }
}
END {
    for (termId in frequency) {
        print termId, frequency[termId];
    }
}' $filename
